import ddddocr
import requests
from lxml import etree
# coding:utf-8
if __name__ == '__main__':
    url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54'
    }
    page_text = requests.get(url=url,headers=headers).text
    tree = etree.HTML(page_text)
    img_url = 'https://so.gushiwen.cn/'+tree.xpath('//*[@id="imgCode"]/@src')[0]
    img_data = requests.get(url=img_url,headers=headers).content
    with open('./coding.jpg','wb') as fp :
        fp.write(img_data)
        pass
    ocr = ddddocr.DdddOcr()
    with open('./coding.jpg','rb') as f :
        img = f.read()
        pass
    result = ocr.classification(img)
    print(result)
    session = requests.Session()
    login_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'
    data = {
        '_VIEWSTATE':'KI8RXvYTPo443Yf47ysyb97h5fWBLM/7CJEqay1j0tpL11Plivmtq59MlRQedwdUPgFcn2HiurV0pb4S54HO2SXesIl + kjDDWtxGf78bp0eKadsaDWoAGq7hNJPaeILSN6j6u + n / MX4FxTEzxZO2UK30f9k =',
        '__VIEWSTATEGENERATOR':'C93BE1AE',
        'from':'http: // so.gushiwen.cn / user / collect.aspx',
        'email':'2523810765 @ qq.com',
        'pwd':'AAaa1234',
        'code':result,
        'denglu':'登录',
    }
    response = session.post(url=login_url,headers=headers,data=data)
    print(response.status_code)
    detail_url = 'https://so.gushiwen.cn/user/collect.aspx'
    detail_page_text = session.get(url=detail_url,headers=headers).text
    with open('./古诗文网.html','w',encoding='utf-8') as fp :
        fp.write(detail_page_text)